import urllib2
import re
from myrequest import u_agent

class Spider:
    def __init__(self):
        self.url = ''

    def load_page(self, page=1):
        self.page = page
        self.url = 'https://duanziwang.com/page/'+str(self.page)+'/'

        headers = {"User_Agent":u_agent()}
        request = urllib2.Request(self.url, headers = headers)
        response = urllib2.urlopen(request)

        html = response.read()

        pattern = re.compile(r'<div\sclass="post-content">(.*?)</div>', re.S)

        content_list = pattern.findall(html)
        return content_list

    def deal_page(self, content_list):

        for content in content_list:
            out = re.sub(r'<.*?>', '', content)
            # out = content
            out = re.sub(r'\s*', '', out)

            if len(out) > 10:
                out = out.strip()
                self.write_page(out)



    def write_page(self, item):
        with open('duanzi.txt', 'a') as f:
            f.write(item+'\r\n')

if __name__ == '__main__':
    duan = Spider()
    duan.deal_page(duan.load_page(2))
